net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/3rd_pass/snakemake-sn/results/"
Clusters with gene_names
macro_type = params$cell_type #macro_structure. It can be cell_type, metabolites, region of the brain.
message(paste0("Cell type: ", macro_type))
## Cell type: ast
min_clust = 30
cluster_codes_df = read.table(paste0(net_dir, "/", macro_type, "/geneBycluster.txt"), header = T, check.names = F, stringsAsFactors = F)
message(paste0("Number of unique genes: ", length(unique(cluster_codes_df$ensembl))))
## Number of unique genes: 16383
createDT(cluster_codes_df)
Number of genes by cluster
Clusters level 1
count1 = as.data.frame(table(cluster_codes_df$cluster_lv1))
colnames(count1) = c("cluster", "n_nodes")
total_nodes = sum(count1$n_nodes)
nodes_in_cluster = sum(count1$n_nodes[count1$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count1$cluster[count1$n_nodes > min_clust])) # 30 in this case
## Number of clusters with at least 30 nodes: 3
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 16398. Percentage: 100% of the genes are assigned to a cluster.
Clusters level 2
count2 = as.data.frame(table(cluster_codes_df$cluster_lv2))
colnames(count2) = c("cluster", "n_nodes")
total_nodes = sum(count2$n_nodes)
nodes_in_cluster = sum(count2$n_nodes[count2$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count2$cluster[count2$n_nodes >= min_clust]))
## Number of clusters with at least 30 nodes: 8
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 16375. Percentage: 99.8597389925601% of the genes are assigned to a cluster.
Clusters level 3
count3 = as.data.frame(table(cluster_codes_df$cluster_lv3))
colnames(count3) = c("cluster", "n_nodes")
total_nodes = sum(count3$n_nodes)
nodes_in_cluster = sum(count3$n_nodes[count3$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count3$cluster[count3$n_nodes >= min_clust])) # 30 in this case
## Number of clusters with at least 30 nodes: 26
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 16346. Percentage: 99.6828881570923% of the genes are assigned to a cluster.
Clusters level 4
count4 = as.data.frame(table(cluster_codes_df$cluster_lv4))
colnames(count4) = c("cluster", "n_nodes")
total_nodes = sum(count4$n_nodes)
nodes_in_cluster = sum(count4$n_nodes[count4$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count4$cluster[count4$n_nodes >= min_clust])) # 30 in this case
## Number of clusters with at least 30 nodes: 76
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 15722. Percentage: 95.8775460422003% of the genes are assigned to a cluster.
Clusters level 5
count5 = as.data.frame(table(cluster_codes_df$cluster_lv5))
colnames(count5) = c("cluster", "n_nodes")
total_nodes = sum(count5$n_nodes)
nodes_in_cluster = sum(count5$n_nodes[count5$n_nodes >= min_clust])
message(paste0("Number of clusters with at least 30 nodes: "), length(count5$cluster[count5$n_nodes >= min_clust])) # 30 in this case
## Number of clusters with at least 30 nodes: 162
message(paste0("Number of genes assigned in clusters with at least 30 nodes: "), nodes_in_cluster, ". Percentage: ", (nodes_in_cluster/total_nodes)*100, "% of the genes are assigned to a cluster.")
## Number of genes assigned in clusters with at least 30 nodes: 12168. Percentage: 74.2041712403952% of the genes are assigned to a cluster.
Session info
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggeasy_0.1.3 readxl_1.3.1 kableExtra_1.3.4 R.matlab_3.6.2 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [11] tibble_3.1.6 tidyverse_1.3.1 limma_3.50.1 ggfortify_0.4.14 ggplot2_3.3.5
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.8 svglite_2.1.0 lubridate_1.8.0 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 R6_2.5.1 cellranger_1.1.0 backports_1.4.1
## [10] reprex_2.0.1 evaluate_0.15 httr_1.4.2 pillar_1.7.0 rlang_1.0.1 rstudioapi_0.13 jquerylib_0.1.4 R.utils_2.11.0 R.oo_1.24.0
## [19] DT_0.20 rmarkdown_2.11 webshot_0.5.2 htmlwidgets_1.5.4 munsell_0.5.0 broom_0.7.12 compiler_4.1.2 modelr_0.1.8 xfun_0.29
## [28] systemfonts_1.0.4 pkgconfig_2.0.3 htmltools_0.5.2 tidyselect_1.1.2 gridExtra_2.3 viridisLite_0.4.0 fansi_1.0.2 crayon_1.5.0 tzdb_0.2.0
## [37] dbplyr_2.1.1 withr_2.4.3 R.methodsS3_1.8.1 grid_4.1.2 jsonlite_1.7.3 gtable_0.3.0 lifecycle_1.0.1 DBI_1.1.2 magrittr_2.0.2
## [46] scales_1.1.1 cli_3.2.0 stringi_1.7.6 fs_1.5.2 xml2_1.3.3 bslib_0.3.1 ellipsis_0.3.2 generics_0.1.2 vctrs_0.3.8
## [55] tools_4.1.2 glue_1.6.1 crosstalk_1.2.0 hms_1.1.1 fastmap_1.1.0 yaml_2.3.5 colorspace_2.0-3 rvest_1.0.2 knitr_1.37
## [64] haven_2.4.3 sass_0.4.0